getwd()
## [1] "/Users/abigailkessel/Documents/MEJO570F22/Final"
pacman::p_load("tidyverse","Hmisc", "skimr", "lubridate", "janitor")
#Import and View Data for Morgan Expenditures
#read csv
mexp <- read_csv("Morgan_Expenditures.csv")
## New names:
## Rows: 184 Columns: 23
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (23): EXPENDITURES, ...2, ...3, ...4, ...5, ...6, ...7, ...8, ...9, ...1...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
#make first row column names
mexp <- mexp %>% row_to_names(row_number = 1)
#clean names
mexp <- clean_names(mexp)
#make things that need to be numeric numeric
mexp$amount1 <- as.numeric(mexp$amount1)
mexp$sum_to_date <- as.numeric(mexp$sum_to_date)
#how i originally removed nas -- solid idea good to have around
#mexp = subset(morgan_expenditures, select = -c(amount, office_sought, declaration, candidate ) )
#remove NAs
mexp <- mexp[,colSums(is.na(mexp))<nrow(mexp)]
#rename account1 to account. so annoying
mexp <- mexp %>%
rename("amount" = "amount1")
head(mexp)
## # A tibble: 6 × 19
## date name stree…¹ stree…² city state full_…³ count…⁴ outsi…⁵ profe…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7/18/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 8/2/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 8/2/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 8/8/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 9/2/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 9/2/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # … with 9 more variables: employer_name <chr>, purpose_type_code <chr>,
## # purpose <chr>, expenditure_type_desc <chr>, account_abbr <chr>,
## # form_of_payment_desc <chr>, description <chr>, amount <dbl>,
## # sum_to_date <dbl>, and abbreviated variable names ¹street_1, ²street_2,
## # ³full_zip, ⁴country_name, ⁵outside_us_postal_code, ⁶profession
#morgan donations
mdon <- read_csv("Morgan_Donations.csv")
## New names:
## Rows: 1985 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (19): RECEIPTS, ...2, ...3, ...4, ...5, ...6, ...7, ...8, ...9, ...10, ....
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
#make first row column names
mdon <- mdon %>% row_to_names(row_number = 1)
#clean names
mdon <- clean_names(mdon)
#get numeric values to be numeric
mdon$sum_to_date <- as.numeric(mdon$sum_to_date)
mdon$amount <- as.numeric(mdon$amount)
#removes all columns that have all na values. from stack overflow https://stackoverflow.com/questions/2643939/remove-columns-from-dataframe-where-all-values-are-na
mdon <- mdon[,colSums(is.na(mdon))<nrow(mdon)]
mdon$full_zip <- str_sub(mdon$full_zip,1, 5)
tail(mdon)
## # A tibble: 6 × 17
## date name stree…¹ stree…² city state full_…³ count…⁴ outsi…⁵ profe…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 9/24/22 Emily Zi… 3601 K… <NA> Char… NC 28226 United… US Museum…
## 2 7/11/22 Phyllis … 801 At… <NA> Caro… NC 28428 United… US Not Em…
## 3 8/11/22 Phyllis … 801 At… <NA> Caro… NC 28428 United… US Not Em…
## 4 9/11/22 Phyllis … 801 At… <NA> Caro… NC 28428 United… US Not Em…
## 5 10/11/22 Phyllis … 801 At… <NA> Caro… NC 28428 United… US Not Em…
## 6 10/14/22 Phyllis … 801 At… <NA> Caro… NC 28428 United… US Not Em…
## # … with 7 more variables: employers_name <chr>, receipt_type_desc <chr>,
## # account_abbr <chr>, form_of_payment_desc <chr>, description <chr>,
## # amount <dbl>, sum_to_date <dbl>, and abbreviated variable names ¹street_1,
## # ²street_2, ³full_zip, ⁴country_name, ⁵outside_us_postal_code, ⁶profession
#import and view Lee donation data
ldon <- read_csv("Lee_Donations.csv")
## New names:
## Rows: 260 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (19): RECEIPTS, ...2, ...3, ...4, ...5, ...6, ...7, ...8, ...9, ...10, ....
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
#make first row column names
ldon <- ldon %>% row_to_names(row_number = 1)
#clean names
ldon <- clean_names(ldon)
#remove NA
ldon <- ldon[,colSums(is.na(ldon))<nrow(ldon)]
#make numeric the money ones
ldon$sum_to_date <- as.numeric(ldon$sum_to_date)
ldon$amount <- as.numeric(ldon$amount)
#ldon$name <- replace_ame()
head(ldon)
## # A tibble: 6 × 18
## date is_prior name stree…¹ stree…² city state full_…³ count…⁴ profe…⁵
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 8/25/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 8/30/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 9/6/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 9/13/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 9/16/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 9/16/22 <NA> Aggregat… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # … with 8 more variables: employers_name <chr>, purpose <chr>,
## # receipt_type_desc <chr>, account_abbr <chr>, form_of_payment_desc <chr>,
## # description <chr>, amount <dbl>, sum_to_date <dbl>, and abbreviated
## # variable names ¹street_1, ²street_2, ³full_zip, ⁴country_name, ⁵profession
#Import and View Lee Expenditures Data
#read in csv
lexp <- read_csv("Lee_Expenditures.csv")
## New names:
## Rows: 165 Columns: 23
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (23): EXPENDITURES, ...2, ...3, ...4, ...5, ...6, ...7, ...8, ...9, ...1...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
#make first row column names
lexp <- lexp %>% row_to_names(row_number = 1)
#clean names
lexp <- clean_names(lexp)
#make things that need to be numeric numeric
lexp$amount1 <- as.numeric(lexp$amount1)
lexp$sum_to_date <- as.numeric(lexp$sum_to_date)
#remove NAs
lexp <- lexp[,colSums(is.na(lexp))<nrow(lexp)]
#rename account1 to account
lexp <- lexp %>%
rename("amount" = "amount1")
head(lexp)
## # A tibble: 6 × 17
## date name stree…¹ stree…² city state full_…³ count…⁴ profe…⁵ emplo…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 7/11/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 7/19/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 8/11/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 4 8/13/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 8/19/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 8/26/22 Aggregate… <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # … with 7 more variables: purpose_type_code <chr>, purpose <chr>,
## # expenditure_type_desc <chr>, account_abbr <chr>,
## # form_of_payment_desc <chr>, amount <dbl>, sum_to_date <dbl>, and
## # abbreviated variable names ¹street_1, ²street_2, ³full_zip, ⁴country_name,
## # ⁵profession, ⁶employer_name
#Now that I’ve done 100 lines of just opening the csv – let’s get into the actual analysis!
#Donations Questions
ltotal <- sum(ldon$amount)
mtotal <- sum(mdon$amount)
ltotal
## [1] 1339059
mtotal
## [1] 1566334
mtotal - ltotal
## [1] 227275.7
2.Summary Lee Donations
summary(ldon$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15 275 1000 5170 2900 275000
#interesting how lee donations are rounded off and morgan donations include the cents
2.5 Summary Morgan Donations
summary(mdon$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 10.0 25.0 789.5 100.0 230000.0
#Analysis Thus Far
Morgan raised more money than Lee by 227,275. Both raised over a million dollars – makes me wonder if this was a particularly high. The min donation for Lee is 15, while for Morgan it’s 1. Lee’s median donation is $1000 - which is way higher than Morgan’s at 25. Both have mean donations way higher than the mean, which signals that there are a few huge donations as outliers.
Questions Raised: How many donations per candidate? Is this typical? Most common donors
nrow_ldon <- nrow(ldon)
nrow_mdon <- nrow(mdon)
nrow_ldon
## [1] 259
nrow_mdon
## [1] 1984
nrow_mdon/nrow_ldon
## [1] 7.660232
7.6 times more donations to Morgan!!
lee_repeats <- ldon %>% group_by(name) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Total))
lee_repeats
## # A tibble: 203 × 3
## name Total Amount
## <chr> <int> <dbl>
## 1 NORTH CAROLINA SENATE MAJORITY FUND 26 1000406.
## 2 Aggregated Individual Contribution 20 814
## 3 BRETT TANNER 3 1500
## 4 PARTH BADHIWALA 3 225
## 5 CENTENE CORPORATION PAC 2 2800
## 6 CHRISTOPHER LOUTIT 2 2600
## 7 CHRISTY TANNER 2 3800
## 8 DANIEL NEWTON 2 350
## 9 FRANKLIN ROUSE 2 1525
## 10 LOUIE J MITCHELL 2 100
## # … with 193 more rows
#8 people donated twice, 2 donated 3 times
4.5 Repeat Donors Morgan
#mdon %>% count(mdon$name) %>% arrange(desc(n))
morgan_repeats <- mdon %>%
group_by(name) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Total))
morgan_repeats
## # A tibble: 565 × 3
## name Total Amount
## <chr> <int> <dbl>
## 1 Aggregated Individual Contribution 1117 19144.
## 2 North Carolina Democratic Party 28 1315490.
## 3 Marcia Ruth Morgan 13 1271.
## 4 Carol M. Sibley 7 535
## 5 Rosemary Freeman 6 2300
## 6 Alina Szmant 5 450
## 7 Amy DeLoach 5 450
## 8 Joshua Smith 5 267.
## 9 Marla Barthen 5 225
## 10 Martha C. Armstrong 5 350
## # … with 555 more rows
l_nonparty <- ltotal - 1000405.84
l_nonparty
## [1] 338653
# a from non party!
m_nonparty <- mtotal - 1315490.33
m_nonparty
## [1] 250844.1
#250844 from non party!
l_nonparty - m_nonparty
## [1] 87808.82
Lee got 338k in donations, while Morgan got 250k
lcontributions <- ldon %>% group_by(receipt_type_desc) %>%
summarise(
Total = n(),
Amount = sum(amount)
)
lcontributions
## # A tibble: 4 × 3
## receipt_type_desc Total Amount
## <chr> <int> <dbl>
## 1 Individual Contribution 193 225210.
## 2 Other Political Committee Contribution 38 107822
## 3 Party Contribution 27 1006006.
## 4 Refund/Reimbursement to the Committee 1 21.4
6.5 Morgan
mcontributions <- mdon %>% group_by(receipt_type_desc) %>%
summarise(
Total = n(),
Amount = sum(amount)
)
mcontributions
## # A tibble: 4 × 3
## receipt_type_desc Total Amount
## <chr> <int> <dbl>
## 1 Individual Contribution 1927 201348.
## 2 Other Political Committee Contribution 25 48674.
## 3 Party Contribution 30 1316240.
## 4 Refund/Reimbursement to the Committee 2 72.3
#chart
receipts<- c("Individual Contribution","Other Political Committee Contribution", "Party Contribution")
Lee <- c(225209.57, 107822.00, 1006005.84)
Morgan <- c(201347.74, 48674.13,1316240.33 )
df1 <- data.frame(Lee, Morgan, receipts)
df2 <- tidyr::pivot_longer(df1, cols=c('Lee', 'Morgan'), names_to='Candidate',
values_to="value")
head(df2)
## # A tibble: 6 × 3
## receipts Candidate value
## <chr> <chr> <dbl>
## 1 Individual Contribution Lee 225210.
## 2 Individual Contribution Morgan 201348.
## 3 Other Political Committee Contribution Lee 107822
## 4 Other Political Committee Contribution Morgan 48674.
## 5 Party Contribution Lee 1006006.
## 6 Party Contribution Morgan 1316240.
ggplot(df2, aes(x=receipts, y=value, fill=Candidate)) +
expand_limits( y=c(0,1500000)) +
geom_bar(stat='identity', position='dodge') +
theme_classic() +
theme(axis.text.x = element_text(angle = 10, vjust = 1, hjust = 1)) +
labs(
title= "Campaign Donations Categorized by Receipt Description",
x="Receipt Type",
y="Amount in Dollars",
)
#Analysis Now:
Morgan had more party money by 300k, but 50k less PAC money. She had almost 8 times as many individual donations, yet had less individual donation money overall by almost 25k. Lee got 338k in donations, while Morgan got 250k. Morgan also has a bunch more repeat donors. Almost no one donates to Lee twice, but that’s probably because they give big money upfront.
ldon %>% filter(receipt_type_desc == "Other Political Committee Contribution")
## # A tibble: 38 × 18
## date is_prior name stree…¹ stree…² city state full_…³ count…⁴ profe…⁵
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 10/6/22 <NA> ANESTH… PO BOX… <NA> GREE… NC 27404 United… <NA>
## 2 10/3/22 <NA> BLUE C… 5901 C… <NA> DURH… NC 27707 United… <NA>
## 3 7/26/22 <NA> BRENT … 2924 E… <NA> AUTR… NC 28318 United… <NA>
## 4 8/11/22 <NA> CENTEN… 770 FO… <NA> ST L… MO 63105 United… <NA>
## 5 10/3/22 <NA> CENTEN… 770 FO… <NA> ST L… MO 63105 United… <NA>
## 6 10/17/22 <NA> CHARTE… 150 FA… SUITE … RALE… NC 27601 United… <NA>
## 7 8/25/22 <NA> DAVE C… PO BOX… <NA> ASHB… NC 27204 United… <NA>
## 8 9/6/22 <NA> DUKE E… 550 SO… <NA> CHAR… NC 28202 United… <NA>
## 9 8/16/22 <NA> EAST C… 2080 B… <NA> GREE… NC 27834 United… <NA>
## 10 10/7/22 <NA> ECEPAC 3710 S… <NA> WILM… NC 28403 United… <NA>
## # … with 28 more rows, 8 more variables: employers_name <chr>, purpose <chr>,
## # receipt_type_desc <chr>, account_abbr <chr>, form_of_payment_desc <chr>,
## # description <chr>, amount <dbl>, sum_to_date <dbl>, and abbreviated
## # variable names ¹street_1, ²street_2, ³full_zip, ⁴country_name, ⁵profession
This gives me strong incumbents vibes. He’s got the support of a lot of different professional groups.
6.5 Morgan PACS
mdon %>% filter(receipt_type_desc == "Other Political Committee Contribution")
## # A tibble: 25 × 17
## date name stree…¹ stree…² city state full_…³ count…⁴ outsi…⁵ profe…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 10/17/22 Back to… 1924 S… <NA> Wilm… NC 28401 United… US <NA>
## 2 10/11/22 COMMITT… PO Box… <NA> Hill… NC 27278 United… US <NA>
## 3 10/12/22 COMMITT… PO Box… <NA> Hill… NC 27278 United… US <NA>
## 4 10/22/22 Elect D… 401 S … <NA> Wilm… NC 28401 United… US <NA>
## 5 9/12/22 ELECT P… PO Box… <NA> Wilm… NC 28402 United… US <NA>
## 6 7/29/22 ELIZABE… 301 Hi… Ste 950 Rale… NC 27603 United… US <NA>
## 7 9/9/22 Emily's… 1800 M… Ste 37… Wash… DC 20036 United… US <NA>
## 8 8/3/22 JASON M… PO Box… <NA> Wilm… NC 28402 United… US <NA>
## 9 9/12/22 Lillian… 3117 P… Ste 130 Rale… NC 27604 United… US <NA>
## 10 10/22/22 Lillian… 3117 P… Ste 130 Rale… NC 27604 United… US <NA>
## # … with 15 more rows, 7 more variables: employers_name <chr>,
## # receipt_type_desc <chr>, account_abbr <chr>, form_of_payment_desc <chr>,
## # description <chr>, amount <dbl>, sum_to_date <dbl>, and abbreviated
## # variable names ¹street_1, ²street_2, ³full_zip, ⁴country_name,
## # ⁵outside_us_postal_code, ⁶profession
ldon_zips <- ldon %>% group_by(full_zip) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
ldon_zips <- na.omit(ldon_zips)
ldon_zips
## # A tibble: 83 × 3
## full_zip Total Amount
## <chr> <int> <dbl>
## 1 27615 26 1000406.
## 2 28403 28 50467.
## 3 28409 28 29800
## 4 28480 9 15900
## 5 28401 7 13450
## 6 28411 10 12400
## 7 28451 3 11450
## 8 28412 4 7300
## 9 27624 2 6600
## 10 28405 19 6575
## # … with 73 more rows
Morgan Zip Codes
mdon_zips <- mdon %>%
group_by(full_zip) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>%arrange(desc(Amount))
mdon_zips <- na.omit(mdon_zips)
mdon_zips
## # A tibble: 146 × 3
## full_zip Total Amount
## <chr> <int> <dbl>
## 1 27611 30 1316240.
## 2 28401 50 16913.
## 3 28403 62 12580.
## 4 28409 101 12138.
## 5 27513 4 11460
## 6 28480 15 9450
## 7 28412 64 9113
## 8 27603 5 8439.
## 9 27517 17 8250
## 10 28411 52 8000
## # … with 136 more rows
#district 7 zip codes
d7_zipcodes<-
c(28403,
28412,
28411,
28409,
28405,
28401,
28428,
28429,
28480,
28449,
28402,
28404,
28407,
28406,
28408,
28410)
How Morgan much money came from the district itself?
mdon_district_zips <- mdon_zips %>% filter(full_zip %in% d7_zipcodes)
mdistrictmoney<- sum(mdon_district_zips$Amount)
mdistrictmoney
## [1] 87871.66
mdistrictmoney/m_nonparty * 100
## [1] 35.03038
How much money isn’t from the district
m_nonparty - mdistrictmoney
## [1] 162972.5
(m_nonparty - mdistrictmoney)/m_nonparty * 100
## [1] 64.96962
How much Lee Money came from the district itself?
ldon_district_zips <- ldon_zips %>% filter(full_zip %in% d7_zipcodes)
ldistrictmoney<- sum(ldon_district_zips$Amount)
ldistrictmoney
## [1] 147442
ldistrictmoney/l_nonparty * 100
## [1] 43.53777
From outside the district?
l_nonparty - ldistrictmoney
## [1] 191211
(l_nonparty - ldistrictmoney)/l_nonparty * 100
## [1] 56.46223
#Chart 1 – Making a map of Donors for Morgan
#most of the map stuff is from this website https://rpubs.com/richmajerus/508643
library(leaflet)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
zipcodes <- zctas(cb = TRUE, starts_with = c("27", "28"))
## Retrieving data for the year 2020
## ZCTAs can take several minutes to download. To cache the data and avoid re-downloading in future R sessions, set `options(tigris_use_cache = TRUE)`
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
zipcodes
## Simple feature collection with 853 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.32187 ymin: 33.84232 xmax: -75.46062 ymax: 36.58812
## Geodetic CRS: NAD83
## First 10 features:
## ZCTA5CE20 AFFGEOID20 GEOID20 NAME20 LSAD20 ALAND20 AWATER20
## 37 28675 860Z200US28675 28675 28675 Z5 278080233 2102802
## 39 27885 860Z200US27885 27885 27885 Z5 321210143 279145367
## 213 28104 860Z200US28104 28104 28104 Z5 76670248 862810
## 214 28645 860Z200US28645 28645 28645 Z5 852362658 1110486
## 215 27886 860Z200US27886 27886 27886 Z5 495940388 2455465
## 216 27312 860Z200US27312 27312 27312 Z5 442692725 18621716
## 217 27332 860Z200US27332 27332 27332 Z5 277343354 6161943
## 218 28713 860Z200US28713 28713 28713 Z5 361508289 14838246
## 219 28164 860Z200US28164 28164 28164 Z5 106199983 1155473
## 220 27412 860Z200US27412 27412 27412 Z5 769445 10369
## geometry
## 37 MULTIPOLYGON (((-81.2733 36...
## 39 MULTIPOLYGON (((-76.28151 3...
## 213 MULTIPOLYGON (((-80.78143 3...
## 214 MULTIPOLYGON (((-81.75579 3...
## 215 MULTIPOLYGON (((-77.67312 3...
## 216 MULTIPOLYGON (((-79.38288 3...
## 217 MULTIPOLYGON (((-79.2884 35...
## 218 MULTIPOLYGON (((-83.69561 3...
## 219 MULTIPOLYGON (((-81.02518 3...
## 220 MULTIPOLYGON (((-79.81516 3...
char_zips <- geo_join(zipcodes,
mdon_zips,
by_sp = "GEOID20",
by_df = "full_zip",
how = "left")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
pal <- colorNumeric( palette = "Greens",
domain = char_zips$Amount)
labels <-
paste0(
"Zip Code: ",
char_zips$GEOID20, "<br/>",
"Amount: ",
scales::dollar(char_zips$Amount)) %>%
lapply(htmltools::HTML)
char_zips <- sf::st_transform(char_zips, crs = 4326)
mdonations_map <- leaflet(char_zips) %>%
# add base map
addProviderTiles("CartoDB") %>%
setView(-79.055847, 35.913200, zoom = 7) %>%
# add zip codes
addPolygons(fillColor = ~pal(Amount),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.7,
highlight = highlightOptions(weight = 2,
color = "#666",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels) %>%
addLegend(pal = pal,
values = ~Amount,
opacity = 0.7,
title = htmltools::HTML("Amount Donated per Zip Code"),
position = "bottomright")
mdonations_map
#Making a map for lee donations
lee_char_zips <- geo_join(zipcodes,
ldon_zips,
by_sp = "GEOID20",
by_df = "full_zip",
how = "left")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
pal2 <- colorNumeric( palette = "Greens",
domain = lee_char_zips$Amount)
labels2 <-
paste0(
"Zip Code: ",
char_zips$GEOID20, "<br/>",
"Amount: ",
scales::dollar(lee_char_zips$Amount)) %>%
lapply(htmltools::HTML)
lee_char_zips <- sf::st_transform(lee_char_zips, crs = 4326)
ldonations_map <- leaflet(lee_char_zips) %>%
# add base map
addProviderTiles("CartoDB") %>%
setView(-79.055847, 35.913200, zoom = 7) %>%
# add zip codes
addPolygons(fillColor = ~pal2(Amount),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.7,
highlight = highlightOptions(weight = 2,
color = "#666",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels2) %>%
addLegend(pal = pal2,
values = ~Amount,
opacity = 0.7,
title = htmltools::HTML("Amount Donated per Zip Code"),
position = "bottomright")
ldonations_map
#Pivoting to Expenditures
#i didn’t end up using any of this but i thought i could catch them doing some sneaky stuff. they didnt. just spent a shit ton on marketing
sum(lexp$amount)
## [1] 1145966
sum(mexp$amount)
## [1] 1272916
Morgan spent about 13k more than Lee. She had more money after all
lpurposes <- lexp %>% group_by(purpose) %>%
summarise(
#Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
lpurposes
## # A tibble: 46 × 2
## purpose Amount
## <chr> <dbl>
## 1 MEDIA BUY 1026409.
## 2 DIGITAL MEDIA BUY 30774.
## 3 <NA> 28649.
## 4 PRODUCTION FOR MEDIA 14000
## 5 PRODUCTION FOR MEDIA 9500
## 6 DIGITAL BILLBAORD BUY 8120
## 7 FOOD AND DRINK FOR FUNDRAISER 7846.
## 8 PRINT MATERIAL 3392.
## 9 SOCAIL MEDIA MARKETING 2000
## 10 PRINTING - TSHRTS 1821.
## # … with 36 more rows
You can just NA your operating expenses? what the heck!!! – after further research he gave everything a purpose code so I guess that’s ok??
2.5 Who exactly is Lee giving money to?
lexp %>% group_by(name) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
## # A tibble: 28 × 3
## name Total Amount
## <chr> <int> <dbl>
## 1 PEOPLE WHO THINK LLC 6 1013549
## 2 FACEBOOK/META 51 43929.
## 3 FIRELUX LLC 2 23500
## 4 CHASE HORTON 8 14000
## 5 LAMAR ADVERTISING 1 8120
## 6 SOCIALRY MARKETING 4 8000
## 7 MICHAEL LEE 1 7846.
## 8 CINDY KUHNE 4 7010
## 9 NEW HANOVER PRINTING & PUBLISHING 4 5119.
## 10 PORT CITY DAILY 2 2500
## # … with 18 more rows
Checking to see what NA purpose purchases Lee is making
lexp %>% filter(is.na(purpose))
## # A tibble: 21 × 17
## date name stree…¹ stree…² city state full_…³ count…⁴ profe…⁵ emplo…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 8/1/22 DEBBIE B… 2124 P… <NA> WILM… NC 28401 United… DIR OF… LEE KA…
## 2 9/1/22 DEBBIE B… 2124 P… <NA> WILM… NC 28401 United… DIR OF… LEE KA…
## 3 10/1/22 DEBBIE B… 2124 P… <NA> WILM… NC 28401 United… DIR OF… LEE KA…
## 4 7/1/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 5 7/15/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 6 8/1/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 7 8/15/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 8 9/1/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 9 9/15/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## 10 10/1/22 CHASE HO… 5423 A… <NA> WILM… NC 28409 United… <NA> <NA>
## # … with 11 more rows, 7 more variables: purpose_type_code <chr>,
## # purpose <chr>, expenditure_type_desc <chr>, account_abbr <chr>,
## # form_of_payment_desc <chr>, amount <dbl>, sum_to_date <dbl>, and
## # abbreviated variable names ¹street_1, ²street_2, ³full_zip, ⁴country_name,
## # ⁵profession, ⁶employer_name
“E Electioneering communication Groups (other than PACs) making electioneering communications”
Every purchase has a Purpose Type Code, so I guess that balances out
lexp %>% group_by(purpose_type_code) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
## # A tibble: 10 × 3
## purpose_type_code Total Amount
## <chr> <int> <dbl>
## 1 A 61 1090693.
## 2 E 19 30510
## 3 B 13 9875.
## 4 P 1 7846.
## 5 O 52 3776.
## 6 K 3 1299
## 7 AD 1 905.
## 8 C 10 723.
## 9 N 1 200
## 10 I 3 139.
mexp %>% group_by(purpose) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
## # A tibble: 22 × 3
## purpose Total Amount
## <chr> <int> <dbl>
## 1 Television 11 1057410.
## 2 Internet Ads 4 155000
## 3 Contribution to Political Party 5 15849
## 4 Print Media 13 15505.
## 5 Consultant - GOTV 1 9000
## 6 Payment Processing Fee 76 6127.
## 7 Compliance Services 3 3043.
## 8 Consultant - Media 3 3000
## 9 Photography Services 1 2858.
## 10 Office Supplies 21 1641.
## # … with 12 more rows
mexp %>% group_by(name) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
## # A tibble: 34 × 3
## name Total Amount
## <chr> <int> <dbl>
## 1 Buying Time, LLC 11 1057410.
## 2 Blueprint Interactive 4 155000
## 3 North Carolina Democratic Party 1 15000
## 4 South Bridge Political Partners 1 9000
## 5 Lamar Media 1 8730
## 6 Stripe 76 6127.
## 7 The Strategy Group 2 4410.
## 8 Blue Wave Political Partners LLC 3 3043.
## 9 Maven Communications 3 3000
## 10 AGE Graphics 1 2757
## # … with 24 more rows
Morgan has no NA purposes, but has NA purpose codes. I guess both parties dont want to fill out forms
mexp %>% group_by(purpose_type_code) %>%
summarise(
Total = n(),
Amount = sum(amount)
) %>% arrange(desc(Amount))
## # A tibble: 6 × 3
## purpose_type_code Total Amount
## <chr> <int> <dbl>
## 1 O 117 1251887.
## 2 G 5 15849
## 3 <NA> 56 4318.
## 4 D 2 428.
## 5 L 2 280
## 6 P 1 154.